clear
do "...\First.do"

* This files creates a full population dataset and the main analysis dataset
********************************************************************************

* Starts with patint population
clear
use "$work\patient_background.dta"

* Merge match between patients and GPs
cap drop _merge
merge 1:1 pnr year using "$work\patient_gp_year.dta"
tab year _merge, row
drop if _merge==2
drop _merge

* Add information about clinic closures
merge m:1 ydernr using "$work\gp_closures_year.dta"
drop if _merge==2
drop _merge

sort pnr year
order pnr year ydernr GP_from_month GP_until_month 


** Adds information about GP SES in each year + other GP characteritcs on the clinic level
merge m:1 ydernr year using "$work\gp_ses.dta"
drop if _merge==2
drop _merge



********************************************************************************
** Finds patients that are affected by clinic closures
** Use the latest year, that a clinic had consultations as the closing year, or the year prior, as they need to annonce the closure 6 months before.
sort pnr year
cap drop help

cap drop help 
cap drop help2
g help=(year_max==year)
g help2=1 if (year_max[_n-1]==year & pnr[_n-1]==pnr & year(GP_until_month[_n-1])==year_max[_n-1])

* Find the first closure a patient experience 
g help_year=year_max if help==1 
replace help_year=year_max[_n-1] if help2==1
bys pnr: egen min_year=min(help_year)
drop help help help2

label var min_year "Year of clinic closure"

* Timing relative to clinic closure
g timing=year-min_year
tab timing

g post=(timing>0)
replace post=. if timing==.
g sample=(inrange(timing,-4,5))

order pnr year ydernr timing

egen id=group(pnr)
xtset id year


*******************************************************************************

label var year "Year"
label var ydernr "Doctor ID"
label var timing "Timing"
label var kom "Municipality code"
label var male "Male"
label var dk "Ethnical Danish"
label var married "Married"
label var age "Age"
label var statins "Statins"
label var low_ses "High school"
label var death_stroke "Death from cardiovascular disease"
label var yod "Year of death"
label var death "Death"
label var ses_missing "Missing information about education"

label var N_doctors "Number of doctors in clinic"
label var yder_ses_missing "Doctor: Missing family ecucation"
label var yder_ses_max "Doctor: Education max"
label var yder_ses_min "Doctor: Education min"
label var yder_ses_mean "Doctor: Education mean"
label var ku "University of Copenhagen"
label var au "Aarhus University"
label var sdu "University of Southern Denmark"
label var other "Other University"


label var mean_age "GP: Age"
label var mean_male "GP: Male"
label var mean_dk "GP: Danish"
label var solo "Solo practice"
label var post "Post closure"




save "$work\full_sample.dta", replace


********************************************************************************
********************************************************************************

clear
use "$work\full_sample.dta"

drop if timing==.

********************************************************************************
*Dif in dif fixed effect
********************************************************************************

* New GP SES
cap drop help
cap drop help3
g help=yder_ses_max if timing==1
bys pnr min_year: egen help3=max(help)
g interaction=help3
replace interaction=0 if post==0
drop help

label var help3 "Dummy new GP low SES"
label var interaction "Dummy new GP low SES x post"
drop if help3==.


* Creating new GP characteritcs
global gp_X "mean_age mean_male mean_dk solo ku au sdu other N_doctors"


sort pnr timing
foreach var of global gp_X {
cap drop help_`var'
g help_`var'=`var' if timing==1
}

gsort pnr -help_mean_age
foreach var of global gp_X {
replace help_`var'=help_`var'[_n-1] if pnr==pnr[_n-1] & missing(help_`var')
}

foreach var of global gp_X {
rename `var' old_`var'
rename help_`var' `var'

}


*** Fixed effects
* Old GP fixed effects
g gp_fe=ydernr if timing==-1
gsort pnr -gp_fe
replace gp_fe=gp_fe[_n-1] if pnr==pnr[_n-1] & gp_fe==""
sort pnr year
egen fe_gp=group(gp_fe)

* Old x new GP fixed effects
g new_gp=ydernr if timing==1
gsort pnr -new_gp
replace new_gp=new_gp[_n-1] if pnr==pnr[_n-1] & new_gp==""

egen newgpyr=group(new_gp min_year)

sort pnr year
egen old_new_gp=group(gp_fe new_gp)

* Find new clinics that have know SES
g known_new2=(yder_ses_missing==0 & timing==1)
bys pnr: egen _known_new2=max(known_new2)
tab _known_new2

g unknown_new=(yder_ses_missing==1 & timing==1)
bys pnr: egen _unknown_new2=max(unknown_new)
tab _unknown_new2


*******************************************************************************
drop if year>2019

g t=timing+5

keep if inrange(timing,-4,5)
keep if inrange(min_year,1999,2016)
keep if inrange(age,30,80)


save "$work\analysis_sample.dta", replace


keep pnr
duplicates drop 
save "$work\analysis_sample_pnr.dta", replace
